library(scMET)
library(data.table)
library(matrixStats)
library(coda)
library(ggpubr)
library(purrr)
set.seed(1234)
data_dir <- "~/datasets/scMET_ms/synthetic/diff_var/data/rep1/"
out_dir <- "~/datasets/scMET_ms/synthetic/shrinkage/"
if (!dir.exists(out_dir)) {dir.create(out_dir, recursive = TRUE)}
opts <- list()
opts$N_feat <- 300
opts$N_cells <- c(20, 50, 200)
opts$N_cpgs <- c(8, 15, 50)
opts$OR_change_gamma <- 2summary_stats <- data.table(cells = numeric(), features = numeric(),
cpgs = numeric(), or_gamma = numeric(),
feature_name = character(), gamma_true = numeric(),
gamma_median = numeric(), mu_true = numeric(),
mu_median = numeric(), gamma_mle = numeric(),
mu_mle = numeric(), cpg_cov = numeric())
for (f in opts$N_feat) {
for (cpg in opts$N_cpgs) {
for (c in opts$N_cells) {
for (or in opts$OR_change_gamma) {
dt <- readRDS(file = paste0(data_dir, "scmet_ORgamma", or, "_feat", f,
"_cells", c, "_cpgs", cpg, "_mcmcFALSE.rds"))
fit_obj <- dt[[paste0("scmet_A")]]
sim_dt <- dt$sim_dt[[paste0("scmet_dt_A")]]
mle <- sim_dt$Y[, bb_mle(cbind(total_reads, met_reads))[c("gamma", "mu")],
by = c("Feature")]
summary_stats <- rbind(summary_stats,
data.table(cells = c,
features = f,
cpgs = cpg,
or_gamma = or,
feature_name = fit_obj$feature_names,
# gamma
gamma_true = sim_dt$theta_true$gamma,
gamma_median = colMedians(fit_obj$posterior$gamma),
# mu
mu_true = sim_dt$theta_true$mu,
mu_median = colMedians(fit_obj$posterior$mu),
# MLE estimates
gamma_mle = mle$gamma,
mu_mle = mle$mu,
# CpG coverage per feature
cpg_cov = sim_dt$Y[, median(total_reads),
by = "Feature"]$V1 ) )
}
}
}
}
rm(mle, fit_obj, sim_dt, dt, f, cpg, c, or)tmp <- copy(summary_stats)
tmp$cells <- factor(tmp$cells, levels = unique(tmp$cells),
labels = paste(unique(tmp$cells), "cells"))
#tmp$gamma_median[tmp$gamma_median > 0.6] <- 0.58
#tmp$gamma_mle[tmp$gamma_mle > 0.6] <- 0.58
gg_overdisp <- list()
iter <- 1
for (cpg in opts$N_cpgs) {
dt <- tmp[features == opts$N_feat & cpgs == cpg]
dt <- dt %>% split(., by = "cells") %>%
map(~ .[sample(150)]) %>%
rbindlist
if (cpg == 8) {
gg_overdisp[[iter]] <- shrinkage_overdisp_plot(dt, x_lab = "True overdispersion (CpG poor, average 5 CpGs)")
}else if (cpg == 15) {
gg_overdisp[[iter]] <- shrinkage_overdisp_plot(dt, x_lab = "True overdispersion (CpG moderate, average 10 CpGs)")
} else {
gg_overdisp[[iter]] <- shrinkage_overdisp_plot(dt, x_lab = "True overdispersion (CpG rich, average 30 CpGs)") +
theme(legend.position = "none")
}
iter <- iter + 1
}
print(cowplot::plot_grid(plotlist = gg_overdisp, nrow = 3,
labels = c("a", "b", "c"), label_size = 18))tmp <- copy(summary_stats)
tmp$cells <- factor(tmp$cells, levels = unique(tmp$cells),
labels = paste(unique(tmp$cells), "cells"))
gg_mean <- list()
iter <- 1
for (cpg in opts$N_cpgs) {
dt <- tmp[features == opts$N_feat & cpgs == cpg]
dt <- dt %>% split(., by = "cells") %>%
map(~ .[sample(150)]) %>%
rbindlist
if (cpg == 8) {
gg_mean[[iter]] <- shrinkage_mean_plots(dt, x_lab = "True mean methylation (CpG poor, average 5 CpGs)")
}else if (cpg == 15) {
gg_mean[[iter]] <- shrinkage_mean_plots(dt, x_lab = "True mean methylation (CpG moderate, average 10 CpGs)")
} else {
gg_mean[[iter]] <- shrinkage_mean_plots(dt, x_lab = "True mean methylation (CpG rich, average 30 CpGs)") +
theme(legend.position = "none")
}
iter <- iter + 1
}
print(cowplot::plot_grid(plotlist = gg_mean, nrow = 3,
labels = c("a", "b", "c"), label_size = 18))opts$val_thresh <- 1e-2
tmp <- copy(summary_stats) %>% .[, c("features", "or_gamma", "cpg_cov") := NULL]
to_plot <- tmp[, c("cells", "cpgs", "feature_name", "gamma_true",
"gamma_median", "gamma_mle")] %>%
.[, gamma_mle := ifelse(gamma_mle > opts$val_thresh &
gamma_mle < 1 - opts$val_thresh, gamma_mle,
ifelse(gamma_mle < opts$val_thresh,
opts$val_thresh, 1 - opts$val_thresh))] %>%
.[, scmet_lor := abs(scMET:::.compute_log_odds_ratio(gamma_true, gamma_median))] %>%
.[, mle_lor := abs(scMET:::.compute_log_odds_ratio(gamma_true, gamma_mle))] %>%
.[, c("gamma_true", "gamma_median", "gamma_mle", "feature_name") := NULL] %>%
melt(id.vars = c("cells", "cpgs"))
to_plot$cpgs <- factor(to_plot$cpgs, levels = c(8, 15, 50),
labels = c("CpG poor", "CpG moderate", "CpG rich"))
to_plot$variable <- factor(to_plot$variable, levels = c("mle_lor", "scmet_lor"),
labels = c("BB MLE", "scMET"))
gg_overdisp_lor <- ggboxplot(to_plot, x = "cells", y = "value",
fill = "variable", lwd = 0.4, outlier.size = 0.5) +
facet_wrap(~cpgs, scales = "fixed", nrow = 3) +
scale_fill_manual(values = c( "#999999", "#E69F00", "#56B4E9")) +
labs(title = NULL, x = "Number of cells",
y = expression(paste("LOR (", gamma[true], ", ",
gamma[estimated], ")")), fill = "Model") +
theme_classic() +
ylim(c(0, 1.8)) +
theme(
legend.position = "top",
legend.title = element_blank(),
legend.margin = margin(-2, 0, -2, 0),
legend.box.margin = margin(-2, 0, -2, 0),
panel.spacing.y = unit(3, "lines"),
plot.tag = element_text(color = "black", face = "bold", size = rel(1.6)),
legend.text = element_text(color = "black", size = rel(1.1)),
strip.text = element_text(color = "black", size = rel(1.1)),
axis.text = element_text(color = "black", size = rel(0.8)),
axis.title = element_text(color = "black", size = rel(1.1))
)
print(gg_overdisp_lor)opts$val_thresh <- 1e-10
tmp <- copy(summary_stats) %>% .[, c("features", "or_gamma", "cpg_cov") := NULL]
to_plot <- tmp[, c("cells", "cpgs", "feature_name",
"mu_true", "mu_median", "mu_mle")] %>%
.[, mu_mle := ifelse(mu_mle > opts$val_thresh &
mu_mle < 1 - opts$val_thresh, mu_mle,
ifelse(mu_mle < opts$val_thresh,
opts$val_thresh, 1 - opts$val_thresh))] %>%
.[, mle_lor := abs(scMET:::.compute_log_odds_ratio(mu_true, mu_mle))] %>%
.[, scmet_lor := abs(scMET:::.compute_log_odds_ratio(mu_true, mu_median))] %>%
.[, c("mu_true", "mu_median", "mu_mle", "feature_name") := NULL] %>%
melt(id.vars = c("cells", "cpgs"))
to_plot$cpgs <- factor(to_plot$cpgs, levels = c(8, 15, 50),
labels = c("CpG poor", "CpG moderate", "CpG rich"))
to_plot$variable <- factor(to_plot$variable, levels = c("mle_lor", "scmet_lor"),
labels = c("BB MLE", "scMET"))
gg_mean_lor <- ggboxplot(to_plot, x = "cells", y = "value",
fill = "variable", lwd = 0.4, outlier.size = 0.5) +
facet_wrap(~cpgs, scales = "fixed", nrow = 3) +
scale_fill_manual(values = c( "#999999", "#E69F00", "#56B4E9")) +
labs(title = NULL, x = "Number of cells",
y = expression(paste("LOR (", mu[true], ", ",
mu[estimated], ")")), fill = "Model") +
theme_classic() +
ylim(c(0, 1.8)) +
theme(
legend.position = "top",
legend.title = element_blank(),
legend.margin = margin(-2, 0, -2, 0),
legend.box.margin = margin(-2, 0, -2, 0),
panel.spacing.y = unit(3, "lines"),
plot.tag = element_text(color = "black", face = "bold", size = rel(1.6)),
legend.text = element_text(color = "black", size = rel(1.1)),
strip.text = element_text(color = "black", size = rel(1.1)),
axis.text = element_text(color = "black", size = rel(0.8)),
axis.title = element_text(color = "black", size = rel(1.1))
)
print(gg_mean_lor)library(patchwork)
gg_gamma <- ((gg_overdisp[[1]]/gg_overdisp[[2]]/gg_overdisp[[3]]) | gg_overdisp_lor) +
plot_layout(widths = c(3.7, 1.2), heights = c(1.08, 1)) +
plot_annotation(tag_levels = 'a')
print(gg_gamma)
pdf(file = paste0(out_dir, "shrinkage_overdisp.pdf"), width = 10, height = 9)print(gg_gamma)
dev.off()gg_mu <- ((gg_mean[[1]]/gg_mean[[2]]/gg_mean[[3]]) | gg_mean_lor) +
plot_layout(widths = c(3.7, 1.2), , heights = c(1.08, 1)) +
plot_annotation(tag_levels = 'a')
print(gg_mu)
pdf(file = paste0(out_dir, "shrinkage_mean.pdf"), width = 10, height = 9)print(gg_mu)
dev.off()